-
Notifications
You must be signed in to change notification settings - Fork 244
[CUBLAS] Update wrapppers to use the ILP64 API #2845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/cublas/libcublas.jl b/lib/cublas/libcublas.jl
index 28d827e4f..2c03203a1 100644
--- a/lib/cublas/libcublas.jl
+++ b/lib/cublas/libcublas.jl
@@ -5914,15 +5914,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSHgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasHSHgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSHgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::CuRef{Cfloat},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- xarray::CuPtr{Ptr{Float16}}, incx::Int64,
- beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSHgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::CuRef{Cfloat},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+ beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5936,15 +5940,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasHSSgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::CuRef{Cfloat},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- xarray::CuPtr{Ptr{Float16}}, incx::Int64,
- beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSSgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::CuRef{Cfloat},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+ beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSTgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5958,15 +5966,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSTgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasTSTgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSTgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::Ptr{Cfloat},
- Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
- xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
- beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSTgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::Ptr{Cfloat},
+ Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+ xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+ beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5980,15 +5992,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasTSSgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::Ptr{Cfloat},
- Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
- xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
- beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSSgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::Ptr{Cfloat},
+ Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+ xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+ beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSHgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6006,19 +6022,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSHgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasHSHgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSHgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{Float16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{Float16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Float16}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSHgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{Float16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Float16}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6036,19 +6056,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasHSSgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSSgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{Float16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{Float16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Cfloat}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSSgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{Float16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Cfloat}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSTgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6066,19 +6090,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSTgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasTSTgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSTgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{BFloat16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{BFloat16}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSTgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{BFloat16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{BFloat16}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6096,19 +6124,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasTSSgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSSgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{BFloat16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Cfloat}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSSgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{BFloat16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Cfloat}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
@@ -6121,14 +6153,18 @@ end
C::Ptr{Float16}, ldc::Cint)::cublasStatus_t
end
-@checked function cublasHgemm_64(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc)
+@checked function cublasHgemm_64(
+ handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+ C, ldc
+ )
initialize_context()
- @ccall libcublas.cublasHgemm_64(handle::cublasHandle_t, transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
- alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
- B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
- C::Ptr{Float16}, ldc::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHgemm_64(
+ handle::cublasHandle_t, transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
+ alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
+ B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
+ C::Ptr{Float16}, ldc::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemmBatched(handle, transa, transb, m, n, k, alpha, Aarray, lda,
@@ -6144,17 +6180,21 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHgemmBatched_64(handle, transa, transb, m, n, k, alpha, Aarray, lda,
- Barray, ldb, beta, Carray, ldc, batchCount)
+@checked function cublasHgemmBatched_64(
+ handle, transa, transb, m, n, k, alpha, Aarray, lda,
+ Barray, ldb, beta, Carray, ldc, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHgemmBatched_64(handle::cublasHandle_t, transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64,
- k::Int64, alpha::CuRef{Float16},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
- beta::CuRef{Float16},
- Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHgemmBatched_64(
+ handle::cublasHandle_t, transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64,
+ k::Int64, alpha::CuRef{Float16},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
+ beta::CuRef{Float16},
+ Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda,
@@ -6173,18 +6213,22 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHgemmStridedBatched_64(handle, transa, transb, m, n, k, alpha, A, lda,
- strideA, B, ldb, strideB, beta, C, ldc, strideC,
- batchCount)
- initialize_context()
- @ccall libcublas.cublasHgemmStridedBatched_64(handle::cublasHandle_t,
- transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64,
- k::Int64, alpha::CuRef{Float16},
- A::CuPtr{Float16}, lda::Int64,
- strideA::Clonglong, B::CuPtr{Float16},
- ldb::Int64, strideB::Clonglong,
- beta::CuRef{Float16}, C::CuPtr{Float16},
- ldc::Int64, strideC::Clonglong,
- batchCount::Int64)::cublasStatus_t
+@checked function cublasHgemmStridedBatched_64(
+ handle, transa, transb, m, n, k, alpha, A, lda,
+ strideA, B, ldb, strideB, beta, C, ldc, strideC,
+ batchCount
+ )
+ initialize_context()
+ @ccall libcublas.cublasHgemmStridedBatched_64(
+ handle::cublasHandle_t,
+ transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64,
+ k::Int64, alpha::CuRef{Float16},
+ A::CuPtr{Float16}, lda::Int64,
+ strideA::Clonglong, B::CuPtr{Float16},
+ ldb::Int64, strideB::Clonglong,
+ beta::CuRef{Float16}, C::CuPtr{Float16},
+ ldc::Int64, strideC::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
diff --git a/lib/cublas/wrappers.jl b/lib/cublas/wrappers.jl
index 241e66a82..4a61e2b29 100644
--- a/lib/cublas/wrappers.jl
+++ b/lib/cublas/wrappers.jl
@@ -1116,7 +1116,7 @@ end
## (GE) general matrix-matrix multiplication
for (fname, fname_64, elty) in ((:cublasDgemm_v2, :cublasDgemm_v2_64, :Float64),
(:cublasSgemm_v2, :cublasSgemm_v2_64, :Float32),
- (:cublasHgemm, :cublasHgemm_64, :Float16),
+ (:cublasHgemm, :cublasHgemm_64, :Float16),
(:cublasZgemm_v2, :cublasZgemm_v2_64, :ComplexF64),
(:cublasCgemm_v2, :cublasCgemm_v2_64, :ComplexF32))
@eval begin
@@ -1527,7 +1527,7 @@ end
## (GE) general matrix-matrix multiplication batched
for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :Float64),
(:cublasSgemmBatched, :cublasSgemmBatched_64, :Float32),
- (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
+ (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
(:cublasZgemmBatched, :cublasZgemmBatched_64, :ComplexF64),
(:cublasCgemmBatched, :cublasCgemmBatched_64, :ComplexF32))
@eval begin
@@ -1594,7 +1594,7 @@ end
## (GE) general matrix-matrix multiplication strided batched
for (fname, fname_64, elty) in ((:cublasDgemmStridedBatched, :cublasDgemmStridedBatched_64, :Float64),
(:cublasSgemmStridedBatched, :cublasSgemmStridedBatched_64, :Float32),
- (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
+ (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
(:cublasZgemmStridedBatched, :cublasZgemmStridedBatched_64, :ComplexF64),
(:cublasCgemmStridedBatched, :cublasCgemmStridedBatched_64, :ComplexF32))
@eval begin
@@ -1946,10 +1946,12 @@ end
## (TR) Triangular matrix and vector multiplication and solution
for (mmname, mmname_64, elty) in
- ((:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
- (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
- (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
- (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32))
+ (
+ (:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
+ (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
+ (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
+ (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32),
+ )
@eval begin
# Note: CUBLAS differs from BLAS API for trmm
# BLAS: inplace modification of B
@@ -1983,10 +1985,12 @@ for (mmname, mmname_64, elty) in
end
for (smname, smname_64, elty) in
- ((:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
- (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
- (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
- (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32))
+ (
+ (:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
+ (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
+ (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
+ (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32),
+ )
@eval begin
function trsm!(side::Char,
uplo::Char,
diff --git a/res/wrap/libcublas_epilogue.jl b/res/wrap/libcublas_epilogue.jl
index f77958580..e32e20bfe 100644
--- a/res/wrap/libcublas_epilogue.jl
+++ b/res/wrap/libcublas_epilogue.jl
@@ -11,15 +11,19 @@
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSHgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasHSHgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSHgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::CuRef{Cfloat},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- xarray::CuPtr{Ptr{Float16}}, incx::Int64,
- beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSHgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::CuRef{Cfloat},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+ beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -33,15 +37,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasHSSgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::CuRef{Cfloat},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- xarray::CuPtr{Ptr{Float16}}, incx::Int64,
- beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSSgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::CuRef{Cfloat},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+ beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSTgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -55,15 +63,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSTgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasTSTgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSTgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::Ptr{Cfloat},
- Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
- xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
- beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSTgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::Ptr{Cfloat},
+ Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+ xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+ beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -77,15 +89,19 @@ end
incy::Cint, batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
- incx, beta, yarray, incy, batchCount)
+@checked function cublasTSSgemvBatched_64(
+ handle, trans, m, n, alpha, Aarray, lda, xarray,
+ incx, beta, yarray, incy, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
- m::Int64, n::Int64, alpha::Ptr{Cfloat},
- Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
- xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
- beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
- incy::Int64, batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSSgemvBatched_64(
+ handle::cublasHandle_t, trans::cublasOperation_t,
+ m::Int64, n::Int64, alpha::Ptr{Cfloat},
+ Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+ xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+ beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
+ incy::Int64, batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSHgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -103,19 +119,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSHgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasHSHgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSHgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{Float16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{Float16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Float16}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSHgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{Float16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Float16}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -133,19 +153,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasHSSgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHSSgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{Float16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{Float16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Cfloat}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHSSgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{Float16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Cfloat}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSTgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -163,19 +187,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSTgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasTSTgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSTgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{BFloat16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{BFloat16}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSTgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{BFloat16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{BFloat16}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasTSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -193,19 +221,23 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasTSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
- x, incx, stridex, beta, y, incy, stridey,
- batchCount)
+@checked function cublasTSSgemvStridedBatched_64(
+ handle, trans, m, n, alpha, A, lda, strideA,
+ x, incx, stridex, beta, y, incy, stridey,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasTSSgemvStridedBatched_64(handle::cublasHandle_t,
- trans::cublasOperation_t, m::Int64, n::Int64,
- alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
- lda::Int64, strideA::Clonglong,
- x::CuPtr{BFloat16}, incx::Int64,
- stridex::Clonglong, beta::CuRef{Cfloat},
- y::CuPtr{Cfloat}, incy::Int64,
- stridey::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasTSSgemvStridedBatched_64(
+ handle::cublasHandle_t,
+ trans::cublasOperation_t, m::Int64, n::Int64,
+ alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+ lda::Int64, strideA::Clonglong,
+ x::CuPtr{BFloat16}, incx::Int64,
+ stridex::Clonglong, beta::CuRef{Cfloat},
+ y::CuPtr{Cfloat}, incy::Int64,
+ stridey::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
@@ -218,14 +250,18 @@ end
C::Ptr{Float16}, ldc::Cint)::cublasStatus_t
end
-@checked function cublasHgemm_64(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc)
+@checked function cublasHgemm_64(
+ handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+ C, ldc
+ )
initialize_context()
- @ccall libcublas.cublasHgemm_64(handle::cublasHandle_t, transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
- alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
- B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
- C::Ptr{Float16}, ldc::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHgemm_64(
+ handle::cublasHandle_t, transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
+ alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
+ B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
+ C::Ptr{Float16}, ldc::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemmBatched(handle, transa, transb, m, n, k, alpha, Aarray, lda,
@@ -241,17 +277,21 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHgemmBatched_64(handle, transa, transb, m, n, k, alpha, Aarray, lda,
- Barray, ldb, beta, Carray, ldc, batchCount)
+@checked function cublasHgemmBatched_64(
+ handle, transa, transb, m, n, k, alpha, Aarray, lda,
+ Barray, ldb, beta, Carray, ldc, batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHgemmBatched_64(handle::cublasHandle_t, transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64,
- k::Int64, alpha::CuRef{Float16},
- Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
- Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
- beta::CuRef{Float16},
- Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHgemmBatched_64(
+ handle::cublasHandle_t, transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64,
+ k::Int64, alpha::CuRef{Float16},
+ Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+ Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
+ beta::CuRef{Float16},
+ Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
+ batchCount::Int64
+ )::cublasStatus_t
end
@checked function cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda,
@@ -270,18 +310,22 @@ end
batchCount::Cint)::cublasStatus_t
end
-@checked function cublasHgemmStridedBatched_64(handle, transa, transb, m, n, k, alpha, A, lda,
- strideA, B, ldb, strideB, beta, C, ldc, strideC,
- batchCount)
+@checked function cublasHgemmStridedBatched_64(
+ handle, transa, transb, m, n, k, alpha, A, lda,
+ strideA, B, ldb, strideB, beta, C, ldc, strideC,
+ batchCount
+ )
initialize_context()
- @ccall libcublas.cublasHgemmStridedBatched_64(handle::cublasHandle_t,
- transa::cublasOperation_t,
- transb::cublasOperation_t, m::Int64, n::Int64,
- k::Int64, alpha::CuRef{Float16},
- A::CuPtr{Float16}, lda::Int64,
- strideA::Clonglong, B::CuPtr{Float16},
- ldb::Int64, strideB::Clonglong,
- beta::CuRef{Float16}, C::CuPtr{Float16},
- ldc::Int64, strideC::Clonglong,
- batchCount::Int64)::cublasStatus_t
+ @ccall libcublas.cublasHgemmStridedBatched_64(
+ handle::cublasHandle_t,
+ transa::cublasOperation_t,
+ transb::cublasOperation_t, m::Int64, n::Int64,
+ k::Int64, alpha::CuRef{Float16},
+ A::CuPtr{Float16}, lda::Int64,
+ strideA::Clonglong, B::CuPtr{Float16},
+ ldb::Int64, strideB::Clonglong,
+ beta::CuRef{Float16}, C::CuPtr{Float16},
+ ldc::Int64, strideC::Clonglong,
+ batchCount::Int64
+ )::cublasStatus_t
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CUDA.jl Benchmarks
Benchmark suite | Current: fa318bf | Previous: c05359d | Ratio |
---|---|---|---|
latency/precompile |
42941278681 ns |
42922336650.5 ns |
1.00 |
latency/ttfp |
7012501535 ns |
7015168424 ns |
1.00 |
latency/import |
3593901953 ns |
3571269514 ns |
1.01 |
integration/volumerhs |
9625712 ns |
9608723 ns |
1.00 |
integration/byval/slices=1 |
147326 ns |
146920.5 ns |
1.00 |
integration/byval/slices=3 |
426789 ns |
425845 ns |
1.00 |
integration/byval/reference |
145182 ns |
145020 ns |
1.00 |
integration/byval/slices=2 |
286848 ns |
286380 ns |
1.00 |
integration/cudadevrt |
103616 ns |
103554 ns |
1.00 |
kernel/indexing |
14395 ns |
14235 ns |
1.01 |
kernel/indexing_checked |
15083 ns |
14711 ns |
1.03 |
kernel/occupancy |
674.2675159235669 ns |
672.5506329113924 ns |
1.00 |
kernel/launch |
2273.222222222222 ns |
2270.3333333333335 ns |
1.00 |
kernel/rand |
18135 ns |
14669 ns |
1.24 |
array/reverse/1d |
20027 ns |
19682 ns |
1.02 |
array/reverse/2d |
25187 ns |
23613.5 ns |
1.07 |
array/reverse/1d_inplace |
10411 ns |
10461 ns |
1.00 |
array/reverse/2d_inplace |
12136 ns |
13212 ns |
0.92 |
array/copy |
20893 ns |
20972 ns |
1.00 |
array/iteration/findall/int |
157185 ns |
157808 ns |
1.00 |
array/iteration/findall/bool |
139596 ns |
139837 ns |
1.00 |
array/iteration/findfirst/int |
163592 ns |
164937 ns |
0.99 |
array/iteration/findfirst/bool |
164983 ns |
165868 ns |
0.99 |
array/iteration/scalar |
71931.5 ns |
73041 ns |
0.98 |
array/iteration/logical |
215679 ns |
214850 ns |
1.00 |
array/iteration/findmin/1d |
46426.5 ns |
46704 ns |
0.99 |
array/iteration/findmin/2d |
96415 ns |
96962.5 ns |
0.99 |
array/reductions/reduce/Int64/1d |
43753 ns |
46033 ns |
0.95 |
array/reductions/reduce/Int64/dims=1 |
47827.5 ns |
55193 ns |
0.87 |
array/reductions/reduce/Int64/dims=2 |
62970.5 ns |
62917 ns |
1.00 |
array/reductions/reduce/Int64/dims=1L |
89100 ns |
88869 ns |
1.00 |
array/reductions/reduce/Int64/dims=2L |
88718 ns |
87079 ns |
1.02 |
array/reductions/reduce/Float32/1d |
34717.5 ns |
34606 ns |
1.00 |
array/reductions/reduce/Float32/dims=1 |
52089 ns |
43875 ns |
1.19 |
array/reductions/reduce/Float32/dims=2 |
60095 ns |
59705 ns |
1.01 |
array/reductions/reduce/Float32/dims=1L |
52647 ns |
52260 ns |
1.01 |
array/reductions/reduce/Float32/dims=2L |
70580.5 ns |
70051.5 ns |
1.01 |
array/reductions/mapreduce/Int64/1d |
43883.5 ns |
42671.5 ns |
1.03 |
array/reductions/mapreduce/Int64/dims=1 |
53014.5 ns |
45980 ns |
1.15 |
array/reductions/mapreduce/Int64/dims=2 |
62792.5 ns |
62143.5 ns |
1.01 |
array/reductions/mapreduce/Int64/dims=1L |
89136 ns |
88812 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=2L |
87839 ns |
86818 ns |
1.01 |
array/reductions/mapreduce/Float32/1d |
34925 ns |
34742 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=1 |
42033 ns |
43090.5 ns |
0.98 |
array/reductions/mapreduce/Float32/dims=2 |
60170 ns |
60061 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=1L |
53055 ns |
52528 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=2L |
70741 ns |
70191 ns |
1.01 |
array/broadcast |
20137 ns |
20155 ns |
1.00 |
array/copyto!/gpu_to_gpu |
12817 ns |
11294 ns |
1.13 |
array/copyto!/cpu_to_gpu |
215702 ns |
216503 ns |
1.00 |
array/copyto!/gpu_to_cpu |
283299 ns |
284237 ns |
1.00 |
array/accumulate/Int64/1d |
125699 ns |
125529 ns |
1.00 |
array/accumulate/Int64/dims=1 |
83460 ns |
84037 ns |
0.99 |
array/accumulate/Int64/dims=2 |
158136 ns |
159166 ns |
0.99 |
array/accumulate/Int64/dims=1L |
1709339 ns |
1720376 ns |
0.99 |
array/accumulate/Int64/dims=2L |
966391 ns |
968348 ns |
1.00 |
array/accumulate/Float32/1d |
110019 ns |
109984 ns |
1.00 |
array/accumulate/Float32/dims=1 |
80843 ns |
81082 ns |
1.00 |
array/accumulate/Float32/dims=2 |
148409.5 ns |
148760 ns |
1.00 |
array/accumulate/Float32/dims=1L |
1618389 ns |
1629307.5 ns |
0.99 |
array/accumulate/Float32/dims=2L |
698983 ns |
701479 ns |
1.00 |
array/construct |
1330.3 ns |
1287.2 ns |
1.03 |
array/random/randn/Float32 |
44727.5 ns |
44176 ns |
1.01 |
array/random/randn!/Float32 |
25125 ns |
24930 ns |
1.01 |
array/random/rand!/Int64 |
27517 ns |
27547 ns |
1.00 |
array/random/rand!/Float32 |
8727 ns |
8724.666666666666 ns |
1.00 |
array/random/rand/Int64 |
30180 ns |
30114 ns |
1.00 |
array/random/rand/Float32 |
13154 ns |
13059 ns |
1.01 |
array/permutedims/4d |
60328 ns |
60761 ns |
0.99 |
array/permutedims/2d |
54408 ns |
54037 ns |
1.01 |
array/permutedims/3d |
55270 ns |
54954 ns |
1.01 |
array/sorting/1d |
2757342.5 ns |
2756544 ns |
1.00 |
array/sorting/by |
3344342 ns |
3343249 ns |
1.00 |
array/sorting/2d |
1080760 ns |
1080799 ns |
1.00 |
cuda/synchronization/stream/auto |
1039.5 ns |
1040.3 ns |
1.00 |
cuda/synchronization/stream/nonblocking |
7651.9 ns |
7220 ns |
1.06 |
cuda/synchronization/stream/blocking |
814.8390804597701 ns |
802.3333333333334 ns |
1.02 |
cuda/synchronization/context/auto |
1182 ns |
1203.5 ns |
0.98 |
cuda/synchronization/context/nonblocking |
8322.7 ns |
7276.700000000001 ns |
1.14 |
cuda/synchronization/context/blocking |
930.6428571428571 ns |
900.4347826086956 ns |
1.03 |
This comment was automatically generated by workflow using github-action-benchmark.
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #2845 +/- ##
===========================================
+ Coverage 77.39% 89.64% +12.25%
===========================================
Files 150 150
Lines 13124 13237 +113
===========================================
+ Hits 10157 11866 +1709
+ Misses 2967 1371 -1596 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
I checked the symbols with
nm -D .../libcusolver.so
and it seems that they are in the library.